This version uses the sample number and barcode ID to choose human cells from an initial clustering of all data to the mixed human/mouse reference genomes. With this vector of IDs, a python script finds corresponding fastq read identifiers (since only the bam file contains “corrected” barcodes). The script then iterates through the paired fastq file, partitioning the matching (human) and non-matching (mouse) reads to separate fastq pairs. These are then run though cellranger count using only the appropriate genomes.
Load human genome split of samples, labeling with hs1..hs4. Output matrix.
Convert to common mouse genes. Re-load into object. Load mouse genome split of samples, labeling with ms1..ms4. Subset overlapping genes. Merge all 8. Normalize and scale. Show clusters labeled by cell; split by sample. Show nCount_RNA also. Determine significantly-different list mouse vs human.
hs1.data=Read10X(data.dir="./hg19/S1/outs/filtered_feature_bc_matrix/")
hs2.data=Read10X(data.dir="./hg19/S2/outs/filtered_feature_bc_matrix/")
hs3.data=Read10X(data.dir="./hg19/S3/outs/filtered_feature_bc_matrix/")
hs4.data=Read10X(data.dir="./hg19/S4/outs/filtered_feature_bc_matrix/")
ms1.data=Read10X(data.dir="./mm10/S1/outs/filtered_feature_bc_matrix/")
ms2.data=Read10X(data.dir="./mm10/S2/outs/filtered_feature_bc_matrix/")
ms3.data=Read10X(data.dir="./mm10/S3/outs/filtered_feature_bc_matrix/")
ms4.data=Read10X(data.dir="./mm10/S4/outs/filtered_feature_bc_matrix/")
#some human gene symbols have underscores (but these are not in geneTrans).
#Substitute a dot so as not to raise an error.
rownames(hs1.data)=gsub("_",".",rownames(hs1.data))
rownames(hs2.data)=gsub("_",".",rownames(hs2.data))
rownames(hs3.data)=gsub("_",".",rownames(hs3.data))
rownames(hs4.data)=gsub("_",".",rownames(hs4.data))
#rename cells
colnames(x=hs1.data) <- paste('hs1',colnames(x=hs1.data),sep="_")
colnames(x=hs2.data) <- paste('hs2',colnames(x=hs2.data),sep="_")
colnames(x=hs3.data) <- paste('hs3',colnames(x=hs3.data),sep="_")
colnames(x=hs4.data) <- paste('hs4',colnames(x=hs4.data),sep="_")
colnames(x=ms1.data) <- paste('ms1',colnames(x=ms1.data),sep="_")
colnames(x=ms2.data) <- paste('ms2',colnames(x=ms2.data),sep="_")
colnames(x=ms3.data) <- paste('ms3',colnames(x=ms3.data),sep="_")
colnames(x=ms4.data) <- paste('ms4',colnames(x=ms4.data),sep="_")
#create objects
hs1=CreateSeuratObject(counts=hs1.data,project="MG",min.cells=5)
hs2=CreateSeuratObject(counts=hs2.data,project="MG",min.cells=5)
hs3=CreateSeuratObject(counts=hs3.data,project="MG",min.cells=5)
hs4=CreateSeuratObject(counts=hs4.data,project="MG",min.cells=5)
ms1=CreateSeuratObject(counts=ms1.data,project="MG",min.cells=5)
ms2=CreateSeuratObject(counts=ms2.data,project="MG",min.cells=5)
ms3=CreateSeuratObject(counts=ms3.data,project="MG",min.cells=5)
ms4=CreateSeuratObject(counts=ms4.data,project="MG",min.cells=5)
#Follow https://satijalab.org/seurat/essential_commands.html
hg=merge(x=hs1,y=c(hs2,hs3,hs4),project="Hg")
mg=merge(x=ms1,y=c(ms2,ms3,ms4),project="Mg")
#at this point we don't need all the original sample objects--can re-create if needed
rm(hs1,hs2,hs3,hs4,hs1.data,hs2.data,hs3.data,hs4.data)
rm(ms1,ms2,ms3,ms4,ms1.data,ms2.data,ms3.data,ms4.data)
#summary of each object
print(hg)
## An object of class Seurat
## 10930 features across 1683 samples within 1 assay
## Active assay: RNA (10930 features)
print(mg)
## An object of class Seurat
## 17264 features across 28218 samples within 1 assay
## Active assay: RNA (17264 features)
Output raw data to data frame. Use conversion table (geneTrans) to translate human symbols to mouse symbols.
#load gene translation table
geneTrans=read.table("geneTrans.txt",sep=",",header=T,stringsAsFactors = F,row.names = 1)
mito.m=grep("^mt",rownames(mg),value=T)
mito.h=grep("^MT-",rownames(hg),value=T)
#turns out, these two vectors (n=13 each) are ordered and match, build an extended translation table
mitoTrans=data.frame(row.names = paste("mm10",mito.m,sep="_"),
Human.Symbol = mito.h,
Homologene_ID = rep(NA,13),
None = rep("yes",13),
Mouse.Symbol = mito.m,
hg19 = paste("hg19",mito.h,sep="_"),
mm10 = paste("mm10",mito.m,sep="_")
)
#extended translation table
xTrans = rbind(geneTrans,mitoTrans)
#extract human raw counts into table
hg.raw=GetAssayData(hg,slot="counts")
#subset rows in geneTrans (not necessary but simpler)
hg.raw=hg.raw[row.names(hg.raw) %in% xTrans$Human.Symbol,] #cut from 13283 to 11364 rows
#translate human symbols to mouse
hg.trans=merge(x=hg.raw,y=xTrans[,c(1,4)],by.x=0,by.y="Human.Symbol",all.x=T)
rownames(hg.trans)=hg.trans$Mouse.Symbol
hg.trans=hg.trans[,!(names(hg.trans) %in% c("Row.names","Mouse.Symbol"))]
#extract mouse raw counts
mg.raw=GetAssayData(mg,slot="counts")
#subset mouse rows in xTrans only
mg.raw=mg.raw[row.names(mg.raw) %in% xTrans$Mouse.Symbol,]
#convert both matrices back into Seurat objects
hm=CreateSeuratObject(hg.trans,project="MG",min.cells=5)
mg=CreateSeuratObject(mg.raw,project="MG",min.cells=5)
print(hm)
## An object of class Seurat
## 9336 features across 1683 samples within 1 assay
## Active assay: RNA (9336 features)
print(mg)
## An object of class Seurat
## 14526 features across 28218 samples within 1 assay
## Active assay: RNA (14526 features)
hm=merge(x=hm,y=mg,project="HM")
#clean up objects no longer needed
rm(hg,mg,hg.raw,mito.m,mito.h)
#summarize merged object
print(hm)
## An object of class Seurat
## 14725 features across 29901 samples within 1 assay
## Active assay: RNA (14725 features)
Use standard workflow to calculate percent.mito (now all mouse symbols) and visualize by sample.
#filter and normalize merged object
mito.features=grep(pattern="^mt",x=rownames(x=hm),value=T)
percent.mito=Matrix::colSums(x=GetAssayData(object=hm,slot="counts")[mito.features,]) / Matrix::colSums(x=GetAssayData(object=hm,slot='counts'))
hm[['percent.mito']] = percent.mito
VlnPlot(object=hm,features=c("nFeature_RNA","nCount_RNA","percent.mito"),ncol=3)
FeatureScatter(object=hm,feature1 = "nCount_RNA",feature2 = "percent.mito")
FeatureScatter(object=hm,feature1 = "nCount_RNA",feature2 = "nFeature_RNA")
Print summary data before and after subsetting. Then normalize, find variable genes, and scale.
print(hm)
## An object of class Seurat
## 14725 features across 29901 samples within 1 assay
## Active assay: RNA (14725 features)
hm=subset(hm,nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mito < 0.05) #try > 100 & < 2500 & < .5
print(hm)
## An object of class Seurat
## 14725 features across 8980 samples within 1 assay
## Active assay: RNA (14725 features)
hm=NormalizeData(hm,normalization.method = "LogNormalize",scale.factor=1e4)
hm=FindVariableFeatures(hm,selection.method = 'mean.var.plot',mean.cutoff = c(0.0125,3),dispersion.cutoff = c(0.5,Inf)) #finds 4047 features
length(VariableFeatures(hm))
## [1] 2105
hm=ScaleData(hm,features=rownames(hm),vars.to.regress = c("nCount_RNA","percent.mito")) #this takes a while
## Regressing out nCount_RNA, percent.mito
## Scaling data matrix
Start with PCA and determine how many dimensions are informative.
hm=RunPCA(hm,features=VariableFeatures(hm),verbose=T)
## PC_ 1
## Positive: C1qb, C1qa, C1qc, B2m, Cx3cr1, Cd74, Fcer1g, Rpl13a, Lst1, Gpr34
## Spp1, Trem2, Uba52, Sat1, Apoc1, Cybb, Alox5ap, Rpl29, H2-Ea-ps, Rnaset2a
## Ptprc, Maf, C3, Ctss, Fos, Fcgr4, P2ry13, P2ry12, Olfml3, Cd84
## Negative: Gria2, Ptn, Meg3, Tsc22d1, Spock2, Pcp4l1, Crip2, Ahi1, Flt1, Meis2
## Nrxn3, Epha5, Cldn5, Igfbp7, Itm2a, Id3, Id1, Syt1, Snap25, Grin2b
## Sorbs2, Igf1r, Scg5, Epas1, Stmn3, Il1rapl1, Snhg11, Fry, R3hdm1, Ablim1
## PC_ 2
## Positive: Flt1, Cldn5, Itm2a, Igfbp7, Ptprb, Id1, Abcb1a, Pglyrp1, Klf2, Egfl7
## Cxcl12, Adgrf5, Slc2a1, Fn1, Ramp2, Sox18, Ablim1, Adgrl4, Ahnak, Sgms1
## Jcad, Esam, Epas1, Spock2, Pecam1, Abcg2, Pltp, Crip1, Kitl, Slc9a3r2
## Negative: Gria2, Meg3, Meis2, Syt1, Nrxn3, Atp1b1, Il1rapl1, Epha5, Grin2b, Celf4
## Ahi1, Stmn3, Snap25, Scg5, Negr1, Ndrg4, Rtn1, Ncam1, Snhg11, Plppr4
## Arpp21, Bex2, Ank3, Peg3, Gad1, Synpr, Opcml, Pcsk2, Eml5, Mt3
## PC_ 3
## Positive: Syt1, Meg3, Snap25, Ndrg4, Celf4, Gad1, Snhg11, Camk2b, Plppr4, Synpr
## Atp2b1, Stmn3, Atp1a3, Ano3, Grin2b, Nrip3, Pcp4l1, Eml5, Bcl11a, Snca
## Gad2, Pcp4, C1qtnf4, Myt1l, Ccsap, Pcsk2, Nrxn3, Grin2a, Sipa1l1, Syt6
## Negative: Atp1a2, Slc1a2, Clu, Gpr37l1, Mt2, F3, Cldn10, Mt3, Ptn, Cxcl14
## Rorb, Slc7a10, Gjb6, Fxyd1, Tril, Aqp4, Fgfr3, Hes5, Slc6a11, Pla2g7
## Msmo1, Tmem47, Vegfa, Appl2, Fjx1, Gabrb1, Mlc1, Phkg1, Car2, Etnppl
## PC_ 4
## Positive: Hexb, Ctss, Ctsd, Fyb, Rnase4, Vsir, Tmem119, Lgmn, Selplg, Fcgr3
## Cd52, Ctsz, Cx3cr1, Mafb, C1qa, C1qc, Unc93b1, P2ry12, Tgfbr1, Ptpn18
## Arsb, Itgb5, Ssh2, Rhoh, Fcer1g, P2ry6, Man2b1, Arhgap45, C1qb, Pou2f2
## Negative: Spp1, Cd74, H2-Ea-ps, C3, Apoc1, Cybb, Tpm1, A2m, Neat1, S100a11
## Fcgr4, Ifi203, Arl5a, H2-DMb1, Rpl29, H2-Eb1, Olr1, 1500009L16Rik, H2-Bl, Folr2
## Sorl1, Rpl13a, Lilrb4a, Glipr1, Adam28, Fcgrt, Uba52, Atm, Clec7a, Tgoln2
## PC_ 5
## Positive: Mt3, Slc1a2, Mt2, Atp1a2, Clu, Cxcl14, Cldn10, F3, Gpr37l1, Gria2
## Rorb, Pla2g7, Gjb6, Gabrb1, Slc7a10, Aqp4, Slc6a11, Fgfr3, Fjx1, Tril
## Tmem47, Fxyd1, Mlc1, Hes5, Appl2, Tubb2b, Phkg1, Etnppl, Vegfa, Fut9
## Negative: Cldn11, Ermn, Tspan2, Mog, Tubb4a, Ugt8a, Mag, Ppp1r14a, Tmem88b, Cnp
## Opalin, Mal, Nkx6-2, Stmn4, Qdpr, Mobp, Cryab, Kctd13, Gjc3, Grb14
## Anln, Sept4, Hapln2, Ttll7, Enpp2, Efnb3, Slc24a2, Tmeff2, Edil3, Kcna1
print(hm[['pca']],dims=1:5,nfeatures=5,projected=F)
## PC_ 1
## Positive: C1qb, C1qa, C1qc, B2m, Cx3cr1
## Negative: Gria2, Ptn, Meg3, Tsc22d1, Spock2
## PC_ 2
## Positive: Flt1, Cldn5, Itm2a, Igfbp7, Ptprb
## Negative: Gria2, Meg3, Meis2, Syt1, Nrxn3
## PC_ 3
## Positive: Syt1, Meg3, Snap25, Ndrg4, Celf4
## Negative: Atp1a2, Slc1a2, Clu, Gpr37l1, Mt2
## PC_ 4
## Positive: Hexb, Ctss, Ctsd, Fyb, Rnase4
## Negative: Spp1, Cd74, H2-Ea-ps, C3, Apoc1
## PC_ 5
## Positive: Mt3, Slc1a2, Mt2, Atp1a2, Clu
## Negative: Cldn11, Ermn, Tspan2, Mog, Tubb4a
VizDimLoadings(hm,dims=1:2)
DimPlot(hm)
hm=ProjectDim(hm)
## PC_ 1
## Positive: Tyrobp, C1qb, C1qa, C1qc, Aif1, B2m, Cx3cr1, Laptm5, Cd74, Fcer1g
## Rpl13a, Lst1, Gpr34, Ftl1, Cyba, Spp1, Trem2, Uba52, Csf1r, Sat1
## Negative: Gria2, Ptn, Sptbn1, Meg3, Selenow, Tsc22d1, Zbtb20, Tcf4, mt-Atp6, Bsg
## Mt1, Pcsk1n, Nfib, Spock2, Pbx1, Atp5j, Selenom, Pcp4l1, Elob, Xist
## PC_ 2
## Positive: Flt1, Cldn5, Itm2a, Igfbp7, Ptprb, Id1, Abcb1a, Pglyrp1, Klf2, Egfl7
## Cxcl12, Adgrf5, Slc2a1, Bsg, Fn1, Ramp2, Sox18, Ablim1, Adgrl4, Ahnak
## Negative: Gria2, Ckb, Meg3, Meis2, Dclk1, Ank2, Pcsk1n, Syt1, Nrxn3, Atp1b1
## Il1rapl1, Epha5, Grin2b, Nrxn1, Dlgap1, Celf2, Celf4, Ahi1, Cadm2, Stmn3
## PC_ 3
## Positive: Syt1, Meg3, Snap25, Ndrg4, Mef2c, Celf4, Gad1, Snhg11, Calm2, Camk2b
## Plppr4, Synpr, Atp2b1, Tmsb4x, Stmn3, Atp1a3, Tmsb10, Ano3, Grin2b, Rps19
## Negative: Atp1a2, Plpp3, Aldoc, Gja1, Slc1a2, Prdx6, Ntsr2, Slc1a3, Clu, Mt1
## Gpr37l1, Mt2, Mfge8, F3, Sparcl1, Atp1b2, Glul, Ptprz1, Id4, Cldn10
## PC_ 4
## Positive: Hexb, Ctss, Ctsd, Fyb, Rnase4, Vsir, Tmem119, Lgmn, Selplg, Fcgr3
## Rpl17, Cd52, Rps17, Ctsz, Serinc3, Sparc, Cx3cr1, Csf1r, Mafb, Cst3
## Negative: Spp1, Cd74, H2-Ea-ps, C3, Apoc1, Cybb, Tpm1, A2m, Neat1, S100a11
## Fcgr4, Ifi203, Arl5a, H2-DMb1, Rpl29, H2-Eb1, Olr1, 1500009L16Rik, H2-Bl, Folr2
## PC_ 5
## Positive: Gja1, Mt3, Slc1a2, Mt2, Aldoc, Atp1a2, S1pr1, Clu, Ntsr2, Cst3
## Sparcl1, Slc1a3, Plpp3, Atp1b2, Cxcl14, Dclk1, Id4, Prdx6, Chchd10, Ntm
## Negative: Cldn11, Ermn, Tspan2, Mog, Tubb4a, Ugt8a, Plp1, Mag, Ppp1r14a, Tmem88b
## Cnp, Opalin, Mal, Nkx6-2, Stmn4, Qdpr, Mobp, Cryab, Kctd13, Gjc3
DimHeatmap(hm,dims=1,cells=500,balanced=T)
DimHeatmap(hm,dims=1:6,cells=500,balanced=T)
hm=JackStraw(hm,num.replicate=100)
hm=ScoreJackStraw(hm,dims=1:20)
JackStrawPlot(hm,dims=1:20)
## Warning: Removed 29460 rows containing missing values (geom_point).
ElbowPlot(hm)
#start clustering
hm=FindNeighbors(hm,dims=1:13) #adjust dims based on plots
## Computing nearest neighbor graph
## Computing SNN
hm=FindClusters(hm,resolution=0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8980
## Number of edges: 313442
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9679
## Number of communities: 12
## Elapsed time: 1 seconds
table(Idents(hm))
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 2188 1579 1334 1048 542 481 477 427 385 273 201 45
hm=RunTSNE(hm,dims=1:13)
DimPlot(hm,reduction='tsne')
DimPlot(hm,reduction='tsne',split.by='orig.ident')
FeaturePlot(hm,features='Spp1')
FeaturePlot(hm,features='Hexb')
FeaturePlot(hm,features='nCount_RNA')
hm=RunUMAP(hm,dims = 1:13)
DimPlot(hm,reduction='umap')
FeaturePlot(hm,features='Spp1')
FeaturePlot(hm,features='Hexb')
#FeaturePlot(hm,features='nCount_RNA',split.by='orig.ident')
FeaturePlot(hm,features='nCount_RNA')
Use top 2 genes from prior clustering to do this, following
hm.markers=FindAllMarkers(hm,only.pos=T,min.pct = .25,logfc.threshold = .25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
hm.markers %>% group_by(cluster) %>% top_n(10,avg_logFC)
## # A tibble: 120 x 7
## # Groups: cluster [12]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 2.70 0.987 0.234 0 0 Slc1a2
## 2 0 2.53 0.949 0.137 0 0 Aldoc
## 3 0 2.40 0.973 0.265 0 0 Mt2
## 4 0 2.39 0.941 0.127 0 0 Plpp3
## 5 0 2.36 0.889 0.107 0 0 Clu
## 6 0 2.35 0.986 0.322 0 0 Mt3
## 7 0 2.32 0.936 0.106 0 0 Gja1
## 8 0 2.29 0.851 0.071 0 0 Ntsr2
## 9 0 2.28 0.995 0.531 0 0 Mt1
## 10 0 2.20 0.975 0.288 0 0 Slc1a3
## # … with 110 more rows
hm.markers %>% group_by(cluster) %>% top_n(5,avg_logFC) %>% write.csv("ID Top 5 per cluster.csv",row.names = F)
hm.markers %>% group_by(cluster) %>% top_n(5,avg_logFC) %>% as.data.frame %>% paged_table
Use Spp1 and Hexb to identify cluster number for human and mouse microglia, respectively.
humClus=as.character(subset(hm.markers,gene=="Spp1")$cluster)
print(humClus)
## [1] "2"
musClus=as.character(subset(hm.markers,gene=="Hexb")$cluster)
print(musClus)
## [1] "1" "11"
diffGenes=FindMarkers(hm,ident.1=humClus,ident.2=musClus,min.pct=0.1)
table(sig=diffGenes$p_val_adj <= 0.05, twofold=abs(diffGenes$avg_logFC) >= 1)
## twofold
## sig FALSE TRUE
## FALSE 20 0
## TRUE 1428 217
diffGenes %>% tibble::rownames_to_column() %>% filter(p_val_adj <= 0.05) %>% filter(abs(avg_logFC) >= 1) %>% paged_table
saveRDS(diffGenes,"by_id_diffGenes.rds")
write.table(as.data.frame(diffGenes),"ID_diffGenes_csv.txt",sep=",",quote=T)
hm.mean=AverageExpression(hm)
## Finished averaging RNA for cluster 0
## Finished averaging RNA for cluster 1
## Finished averaging RNA for cluster 2
## Finished averaging RNA for cluster 3
## Finished averaging RNA for cluster 4
## Finished averaging RNA for cluster 5
## Finished averaging RNA for cluster 6
## Finished averaging RNA for cluster 7
## Finished averaging RNA for cluster 8
## Finished averaging RNA for cluster 9
## Finished averaging RNA for cluster 10
## Finished averaging RNA for cluster 11
head(hm.mean$RNA)
## 0 1 2 3 4 5
## A1bg 0.00000000 0.000000000 0.65321979 0.000000000 0.000000000 0.00000000
## A2m 0.07496938 0.005165434 5.83612846 0.005529554 0.024180524 0.03200524
## Aaas 0.03003902 0.051494882 0.09339857 0.090743325 0.004724759 0.09316861
## Aacs 0.21218134 0.047823888 0.04391915 0.052250458 0.075924614 0.04550186
## Aagab 0.09828363 0.282612763 0.06032349 0.120720215 0.157264117 0.12593642
## Aak1 0.56055607 0.296952853 2.20932446 0.496353622 1.334404149 0.31267420
## 6 7 8 9 10 11
## A1bg 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## A2m 0.01135664 0.02531972 0.30322714 0.03518736 0.03389049 0.06655353
## Aaas 0.04748904 0.03711213 0.09451190 0.05990435 0.03999296 0.08755801
## Aacs 0.05189994 0.00000000 0.01515292 0.00000000 0.13366719 0.00000000
## Aagab 0.09408680 0.07424196 0.11395543 0.03192328 0.05093002 0.00000000
## Aak1 0.78728846 1.48532449 0.42730432 0.41475770 0.73986350 0.66761264
write.csv(hm.mean$RNA,"ID_hm_cluster_averages_csv.txt")
#stash idents
hm[["old.ident"]]=Idents(hm)
#get vector of cell idents
all.cells=Cells(hm)
#split by species
hg.cells=grep("^h",all.cells,value=T)
mm.cells=grep("^m",all.cells,value=T)
#apply new idents
Idents(hm,cells=hg.cells)="Human"
Idents(hm,cells=mm.cells)="Mouse"
table(hm$orig.ident,Idents(hm))
##
## Mouse Human
## hs1 0 317
## hs2 0 232
## hs3 0 510
## hs4 0 307
## ms1 1684 0
## ms2 729 0
## ms3 3273 0
## ms4 1928 0
DimPlot(hm,label=T,repel=T,label.size=8,reduction = "tsne")+NoLegend()
FeaturePlot(hm,features='nCount_RNA',reduction = "tsne")
FeaturePlot(hm,features='nCount_RNA',reduction = "tsne",split.by="ident")
To test the level of detection in each cell, we’ll display the distribution of counted transcripts per cells.
#replace counts > 0 with T/F
hg.nz=as.data.frame(hg.trans) > 0
mg.nz=as.data.frame(mg.raw) > 0
#sum the TRUE values for each column (cell)
mg.nz=apply(mg.nz,MARGIN=2,FUN=sum)
hg.nz=apply(hg.nz,MARGIN=2,FUN=sum)
#check distributions
plot(density(log10(hg.nz)))
lines(density(log10(mg.nz)),lty="dashed")
Finally, we’ll check whether any cell identifiers (sample_barcode) are found in common across the two species.
#Venn
#extract only sample_barcodes with more than 500 genes > 0
hg.x=names(which(hg.nz > 500))
mg.x=names(which(mg.nz > 500))
#strip off species code from sample index and assemble into list
#diffList=list(hg19=sapply(strsplit(hg.x,"_"),`[`,2),mm10=sapply(strsplit(mg.x,"_"),`[`,2))
diffList=list(hg19=unique(sub("h","",hg.x)),mm10=unique(sub("m","",mg.x)))
#generate object
venn.plot=venn.diagram(diffList,filename=NULL,euler.d=T,scaled=T,col='transparent',
alpha=.5,fill=c("cornflowerblue","coral2"),
fontfamily='Helvetica',cat.fontfamily='Helvetica',cat.cex=3,cex=2)
png("ID Venn Overlap.png",width=1200,height=900)
grid.draw(venn.plot) #save to file
dev.off()
## png
## 2
grid.newpage()
#plot it in output
grid.draw(venn.plot)
sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] VennDiagram_1.6.20 futile.logger_1.4.3 rmarkdown_1.12
## [4] dplyr_0.8.0.1 Seurat_3.0.0.9000
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-137 tsne_0.1-3 bitops_1.0-6
## [4] RColorBrewer_1.1-2 httr_1.4.0 tools_3.5.3
## [7] utf8_1.1.4 R6_2.4.0 irlba_2.3.3
## [10] KernSmooth_2.23-15 lazyeval_0.2.2 colorspace_1.4-1
## [13] withr_2.1.2 npsurv_0.4-0 tidyselect_0.2.5
## [16] compiler_3.5.3 cli_1.1.0 formatR_1.6
## [19] plotly_4.9.0 labeling_0.3 caTools_1.17.1.2
## [22] scales_1.0.0 lmtest_0.9-36 ggridges_0.5.1
## [25] pbapply_1.4-0 stringr_1.4.0 digest_0.6.18
## [28] R.utils_2.8.0 pkgconfig_2.0.2 htmltools_0.3.6
## [31] bibtex_0.4.2 htmlwidgets_1.3 rlang_0.3.4
## [34] zoo_1.8-5 jsonlite_1.6 ica_1.0-2
## [37] gtools_3.8.1 R.oo_1.22.0 magrittr_1.5
## [40] Matrix_1.2-17 fansi_0.4.0 Rcpp_1.0.1
## [43] munsell_0.5.0 ape_5.3 reticulate_1.12
## [46] R.methodsS3_1.7.1 stringi_1.4.3 yaml_2.2.0
## [49] gbRd_0.4-11 MASS_7.3-51.1 gplots_3.0.1.1
## [52] Rtsne_0.15 plyr_1.8.4 parallel_3.5.3
## [55] gdata_2.18.0 listenv_0.7.0 ggrepel_0.8.0
## [58] crayon_1.3.4 lattice_0.20-38 cowplot_0.9.4
## [61] splines_3.5.3 SDMTools_1.1-221 knitr_1.22
## [64] pillar_1.3.1 igraph_1.2.4 future.apply_1.2.0
## [67] codetools_0.2-16 futile.options_1.0.1 glue_1.3.1
## [70] evaluate_0.13 lsei_1.2-0 metap_1.1
## [73] lambda.r_1.2.3 data.table_1.12.2 png_0.1-7
## [76] Rdpack_0.11-0 gtable_0.3.0 RANN_2.6.1
## [79] purrr_0.3.2 tidyr_0.8.3 future_1.12.0
## [82] assertthat_0.2.1 ggplot2_3.1.1 xfun_0.6
## [85] rsvd_1.0.0 survival_2.43-3 viridisLite_0.3.0
## [88] tibble_2.1.1 cluster_2.0.8 globals_0.12.4
## [91] fitdistrplus_1.0-14 ROCR_1.0-7